# Importing and installing libaries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
!pip install seaborn
import seaborn as sns
!pip install plotly==4.14.3
import plotly.graph_objects as go
import warnings
Requirement already satisfied: seaborn in ./opt/anaconda3/lib/python3.9/site-packages (0.11.2) Requirement already satisfied: numpy>=1.15 in ./opt/anaconda3/lib/python3.9/site-packages (from seaborn) (1.21.5) Requirement already satisfied: matplotlib>=2.2 in ./opt/anaconda3/lib/python3.9/site-packages (from seaborn) (3.5.1) Requirement already satisfied: scipy>=1.0 in ./opt/anaconda3/lib/python3.9/site-packages (from seaborn) (1.7.3) Requirement already satisfied: pandas>=0.23 in ./opt/anaconda3/lib/python3.9/site-packages (from seaborn) (1.4.2) Requirement already satisfied: packaging>=20.0 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (21.3) Requirement already satisfied: python-dateutil>=2.7 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (2.8.2) Requirement already satisfied: kiwisolver>=1.0.1 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (1.3.2) Requirement already satisfied: pyparsing>=2.2.1 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (3.0.4) Requirement already satisfied: fonttools>=4.22.0 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (4.25.0) Requirement already satisfied: pillow>=6.2.0 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (9.0.1) Requirement already satisfied: cycler>=0.10 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (0.11.0) Requirement already satisfied: pytz>=2020.1 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas>=0.23->seaborn) (2021.3) Requirement already satisfied: six>=1.5 in ./opt/anaconda3/lib/python3.9/site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.16.0) Requirement already satisfied: plotly==4.14.3 in ./opt/anaconda3/lib/python3.9/site-packages (4.14.3) Requirement already satisfied: six in ./opt/anaconda3/lib/python3.9/site-packages (from plotly==4.14.3) (1.16.0) Requirement already satisfied: retrying>=1.3.3 in ./opt/anaconda3/lib/python3.9/site-packages (from plotly==4.14.3) (1.3.3)
# Importing Datasets
Data_2018 = pd.read_excel("/Users/Admin/Desktop/India Startup Funding/startup_funding2018.xls")
Data_2019 = pd.read_csv("/Users/Admin/Desktop/India Startup Funding/startup_funding2019.csv")
Data_2020 = pd.read_csv("/Users/Admin/Desktop/India Startup Funding/startup_funding2020.csv")
Data_2021 = pd.read_csv("/Users/Admin/Desktop/India Startup Funding/startup_funding2021.csv")
# Adding a new column called Funding Year to all datasets
Data_2018["Funding Year"] = 2018
Data_2019["Funding Year"] = 2019
Data_2020["Funding Year"] = 2020
Data_2021["Funding Year"] = 2021
# Previewing 2018 dataset
Data_2018.head()
| Company Name | Industry | Round/Series | Amount | Location | About Company | Calculation | Amount.1 | Funding Year | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | TheCollegeFever | Brand Marketing, Event Promotion, Marketing, S... | Seed | 250000 | Bangalore, Karnataka, India | TheCollegeFever is a hub for fun, fiesta and f... | 0.0157 | NaN | 2018 |
| 1 | Happy Cow Dairy | Agriculture, Farming | Seed | 628000 | Mumbai, Maharashtra, India | A startup which aggregates milk from dairy far... | NaN | 40000000 | 2018 |
| 2 | MyLoanCare | Credit, Financial Services, Lending, Marketplace | Series A | 1020500.0 | Gurgaon, Haryana, India | Leading Online Loans Marketplace in India | NaN | 65000000 | 2018 |
| 3 | PayMe India | Financial Services, FinTech | Angel | 2000000 | Noida, Uttar Pradesh, India | PayMe India is an innovative FinTech organizat... | NaN | NaN | 2018 |
| 4 | Eunimart | E-Commerce Platforms, Retail, SaaS | Seed | — | Hyderabad, Andhra Pradesh, India | Eunimart is a one stop solution for merchants ... | NaN | — | 2018 |
# Printing a concise summary of the 2018 dataset
Data_2018.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 526 entries, 0 to 525 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Company Name 526 non-null object 1 Industry 526 non-null object 2 Round/Series 526 non-null object 3 Amount 526 non-null object 4 Location 526 non-null object 5 About Company 526 non-null object 6 Calculation 1 non-null float64 7 Amount.1 291 non-null object 8 Funding Year 526 non-null int64 dtypes: float64(1), int64(1), object(7) memory usage: 37.1+ KB
# Renaming columns in 2018 data set
Data_2018.rename(columns = {"Company Name": "Company/Brand", "Industry": "Sector", "Round/Series": "Stage", "Amount": "Amount($)", "Location": "HeadQuarter", "About Company": "What it does"}, inplace = True)
Data_2018.head()
| Company/Brand | Sector | Stage | Amount($) | HeadQuarter | What it does | Calculation | Amount.1 | Funding Year | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | TheCollegeFever | Brand Marketing, Event Promotion, Marketing, S... | Seed | 250000 | Bangalore, Karnataka, India | TheCollegeFever is a hub for fun, fiesta and f... | 0.0157 | NaN | 2018 |
| 1 | Happy Cow Dairy | Agriculture, Farming | Seed | 628000 | Mumbai, Maharashtra, India | A startup which aggregates milk from dairy far... | NaN | 40000000 | 2018 |
| 2 | MyLoanCare | Credit, Financial Services, Lending, Marketplace | Series A | 1020500.0 | Gurgaon, Haryana, India | Leading Online Loans Marketplace in India | NaN | 65000000 | 2018 |
| 3 | PayMe India | Financial Services, FinTech | Angel | 2000000 | Noida, Uttar Pradesh, India | PayMe India is an innovative FinTech organizat... | NaN | NaN | 2018 |
| 4 | Eunimart | E-Commerce Platforms, Retail, SaaS | Seed | — | Hyderabad, Andhra Pradesh, India | Eunimart is a one stop solution for merchants ... | NaN | — | 2018 |
#Next step is to drop columns that are irrelevant to answering my research questions.
# Dropping columns
Data_2018.drop(columns = ["Calculation", "Amount.1", "What it does", "Stage"], inplace = True)
Data_2018.head()
| Company/Brand | Sector | Amount($) | HeadQuarter | Funding Year | |
|---|---|---|---|---|---|
| 0 | TheCollegeFever | Brand Marketing, Event Promotion, Marketing, S... | 250000 | Bangalore, Karnataka, India | 2018 |
| 1 | Happy Cow Dairy | Agriculture, Farming | 628000 | Mumbai, Maharashtra, India | 2018 |
| 2 | MyLoanCare | Credit, Financial Services, Lending, Marketplace | 1020500.0 | Gurgaon, Haryana, India | 2018 |
| 3 | PayMe India | Financial Services, FinTech | 2000000 | Noida, Uttar Pradesh, India | 2018 |
| 4 | Eunimart | E-Commerce Platforms, Retail, SaaS | — | Hyderabad, Andhra Pradesh, India | 2018 |
#Renaming Column name Amount($) to Amount in USD
Data_2018.rename(columns = {"Amount($)": "Amount in USD"}, inplace = True)
# Changing amount datatype to int/float
Data_2018["Amount in USD"] = pd.to_numeric(Data_2018["Amount in USD"], errors = "coerce").fillna(0, downcast = "infer")
Data_2018.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 526 entries, 0 to 525 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Company/Brand 526 non-null object 1 Sector 526 non-null object 2 Amount in USD 526 non-null float64 3 HeadQuarter 526 non-null object 4 Funding Year 526 non-null int64 dtypes: float64(1), int64(1), object(3) memory usage: 20.7+ KB
# Checking for any other errors in the 2018 data set
Data_2018.isnull().sum()
Company/Brand 0 Sector 0 Amount in USD 0 HeadQuarter 0 Funding Year 0 dtype: int64
# Stripping data to separate the values in the column by commas and select the first value only
Data_2018['Sector'] = Data_2018['Sector'].str.split(',').str[0]
Data_2018['HeadQuarter'] = Data_2018['HeadQuarter'].str.split(',').str[0]
Data_2018
| Company/Brand | Sector | Amount in USD | HeadQuarter | Funding Year | |
|---|---|---|---|---|---|
| 0 | TheCollegeFever | Brand Marketing | 250000.0 | Bangalore | 2018 |
| 1 | Happy Cow Dairy | Agriculture | 628000.0 | Mumbai | 2018 |
| 2 | MyLoanCare | Credit | 1020500.0 | Gurgaon | 2018 |
| 3 | PayMe India | Financial Services | 2000000.0 | Noida | 2018 |
| 4 | Eunimart | E-Commerce Platforms | 0.0 | Hyderabad | 2018 |
| ... | ... | ... | ... | ... | ... |
| 521 | Udaan | B2B | 225000000.0 | Bangalore | 2018 |
| 522 | Happyeasygo Group | Tourism | 0.0 | Haryana | 2018 |
| 523 | Mombay | Food and Beverage | 7500.0 | Mumbai | 2018 |
| 524 | Droni Tech | Information Technology | 549500.0 | Mumbai | 2018 |
| 525 | Netmeds | Biotechnology | 35000000.0 | Chennai | 2018 |
526 rows × 5 columns
# Replacing '-' values in the sector and headquarter columns with 'unknown'
Data_2018["HeadQuarter"].replace('-', value = "unknown", inplace = True)
Data_2018["Sector"].replace('-', value = "unknown", inplace = True)
Data_2018
| Company/Brand | Sector | Amount in USD | HeadQuarter | Funding Year | |
|---|---|---|---|---|---|
| 0 | TheCollegeFever | Brand Marketing | 250000.0 | Bangalore | 2018 |
| 1 | Happy Cow Dairy | Agriculture | 628000.0 | Mumbai | 2018 |
| 2 | MyLoanCare | Credit | 1020500.0 | Gurgaon | 2018 |
| 3 | PayMe India | Financial Services | 2000000.0 | Noida | 2018 |
| 4 | Eunimart | E-Commerce Platforms | 0.0 | Hyderabad | 2018 |
| ... | ... | ... | ... | ... | ... |
| 521 | Udaan | B2B | 225000000.0 | Bangalore | 2018 |
| 522 | Happyeasygo Group | Tourism | 0.0 | Haryana | 2018 |
| 523 | Mombay | Food and Beverage | 7500.0 | Mumbai | 2018 |
| 524 | Droni Tech | Information Technology | 549500.0 | Mumbai | 2018 |
| 525 | Netmeds | Biotechnology | 35000000.0 | Chennai | 2018 |
526 rows × 5 columns
Data_2018
| Company/Brand | Sector | Amount in USD | HeadQuarter | Funding Year | |
|---|---|---|---|---|---|
| 0 | TheCollegeFever | Brand Marketing | 250000.0 | Bangalore | 2018 |
| 1 | Happy Cow Dairy | Agriculture | 628000.0 | Mumbai | 2018 |
| 2 | MyLoanCare | Credit | 1020500.0 | Gurgaon | 2018 |
| 3 | PayMe India | Financial Services | 2000000.0 | Noida | 2018 |
| 4 | Eunimart | E-Commerce Platforms | 0.0 | Hyderabad | 2018 |
| ... | ... | ... | ... | ... | ... |
| 521 | Udaan | B2B | 225000000.0 | Bangalore | 2018 |
| 522 | Happyeasygo Group | Tourism | 0.0 | Haryana | 2018 |
| 523 | Mombay | Food and Beverage | 7500.0 | Mumbai | 2018 |
| 524 | Droni Tech | Information Technology | 549500.0 | Mumbai | 2018 |
| 525 | Netmeds | Biotechnology | 35000000.0 | Chennai | 2018 |
526 rows × 5 columns
# Previewing 2019 dataset to view
Data_2019.head()
| Company/Brand | Founded | HeadQuarter | Sector | What it does | Founders | Investor | Amount($) | Stage | Funding Year | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Bombay Shaving | NaN | NaN | Ecommerce | Provides a range of male grooming products | Shantanu Deshpande | Sixth Sense Ventures | $6,300,000 | NaN | 2019 |
| 1 | Ruangguru | 2014.0 | Mumbai | Edtech | A learning platform that provides topic-based ... | Adamas Belva Syah Devara, Iman Usman. | General Atlantic | $150,000,000 | Series C | 2019 |
| 2 | Eduisfun | NaN | Mumbai | Edtech | It aims to make learning fun via games. | Jatin Solanki | Deepak Parekh, Amitabh Bachchan, Piyush Pandey | $28,000,000 | Fresh funding | 2019 |
| 3 | HomeLane | 2014.0 | Chennai | Interior design | Provides interior designing solutions | Srikanth Iyer, Rama Harinath | Evolvence India Fund (EIF), Pidilite Group, FJ... | $30,000,000 | Series D | 2019 |
| 4 | Nu Genes | 2004.0 | Telangana | AgriTech | It is a seed company engaged in production, pr... | Narayana Reddy Punyala | Innovation in Food and Agriculture (IFA) | $6,000,000 | NaN | 2019 |
# Printing a summary of the dataset to find errors and missing values.
Data_2019.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 89 entries, 0 to 88 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Company/Brand 89 non-null object 1 Founded 60 non-null float64 2 HeadQuarter 70 non-null object 3 Sector 84 non-null object 4 What it does 89 non-null object 5 Founders 86 non-null object 6 Investor 89 non-null object 7 Amount($) 89 non-null object 8 Stage 43 non-null object 9 Funding Year 89 non-null int64 dtypes: float64(1), int64(1), object(8) memory usage: 7.1+ KB
# Dropping Founders, Stage and What it does columns.
Data_2019.drop(columns = ["Founders", "Stage", "What it does"], inplace = True)
Data_2019.head()
| Company/Brand | Founded | HeadQuarter | Sector | Investor | Amount($) | Funding Year | |
|---|---|---|---|---|---|---|---|
| 0 | Bombay Shaving | NaN | NaN | Ecommerce | Sixth Sense Ventures | $6,300,000 | 2019 |
| 1 | Ruangguru | 2014.0 | Mumbai | Edtech | General Atlantic | $150,000,000 | 2019 |
| 2 | Eduisfun | NaN | Mumbai | Edtech | Deepak Parekh, Amitabh Bachchan, Piyush Pandey | $28,000,000 | 2019 |
| 3 | HomeLane | 2014.0 | Chennai | Interior design | Evolvence India Fund (EIF), Pidilite Group, FJ... | $30,000,000 | 2019 |
| 4 | Nu Genes | 2004.0 | Telangana | AgriTech | Innovation in Food and Agriculture (IFA) | $6,000,000 | 2019 |
#Renaming Column name Amount($) to Amount in USD
Data_2019.rename(columns = {"Amount($)": "Amount in USD"}, inplace = True)
# Removing dollar sign from the Amount in USD column
Data_2019["Amount in USD"] = Data_2019["Amount in USD"].replace({"\$": "", ",": ""}, regex = True)
# Changing amount datatype to int/float
Data_2019["Amount in USD"] = pd.to_numeric(Data_2019["Amount in USD"], errors = "coerce").fillna(0, downcast = "infer")
# Changing undisclosed values in 'Amount in USD' to 0
Updated = Data_2019["Amount in USD"] == "Undisclosed"
Data_2019.loc[Updated, "Amount in USD"] = 0
Data_2019
| Company/Brand | Founded | HeadQuarter | Sector | Investor | Amount in USD | Funding Year | |
|---|---|---|---|---|---|---|---|
| 0 | Bombay Shaving | NaN | NaN | Ecommerce | Sixth Sense Ventures | 6300000 | 2019 |
| 1 | Ruangguru | 2014.0 | Mumbai | Edtech | General Atlantic | 150000000 | 2019 |
| 2 | Eduisfun | NaN | Mumbai | Edtech | Deepak Parekh, Amitabh Bachchan, Piyush Pandey | 28000000 | 2019 |
| 3 | HomeLane | 2014.0 | Chennai | Interior design | Evolvence India Fund (EIF), Pidilite Group, FJ... | 30000000 | 2019 |
| 4 | Nu Genes | 2004.0 | Telangana | AgriTech | Innovation in Food and Agriculture (IFA) | 6000000 | 2019 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 84 | Infra.Market | NaN | Mumbai | Infratech | Tiger Global, Nexus Venture Partners, Accel Pa... | 20000000 | 2019 |
| 85 | Oyo | 2013.0 | Gurugram | Hospitality | MyPreferred Transformation, Avendus Finance, S... | 693000000 | 2019 |
| 86 | GoMechanic | 2016.0 | Delhi | Automobile & Technology | Sequoia Capital | 5000000 | 2019 |
| 87 | Spinny | 2015.0 | Delhi | Automobile | Norwest Venture Partners, General Catalyst, Fu... | 50000000 | 2019 |
| 88 | Ess Kay Fincorp | NaN | Rajasthan | Banking | TPG, Norwest Venture Partners, Evolvence India | 33000000 | 2019 |
89 rows × 7 columns
# Replacing null values in columns with unknown
Data_2019["Founded"].replace(np.NaN, value = "unknown", inplace = True)
Data_2019["HeadQuarter"].replace(np.NaN, value = "unknown", inplace = True)
Data_2019["Sector"].replace(np.NaN, value = "unknown", inplace = True)
#Changing datatype for Founded Column from object to int
Data_2019['Founded']= Data_2019['Founded'].astype('str')
# Checking for null values in 2019 Dataset.
Data_2019.isnull().sum()
Company/Brand 0 Founded 0 HeadQuarter 0 Sector 0 Investor 0 Amount in USD 0 Funding Year 0 dtype: int64
Data_2019
| Company/Brand | Founded | HeadQuarter | Sector | Investor | Amount in USD | Funding Year | |
|---|---|---|---|---|---|---|---|
| 0 | Bombay Shaving | unknown | unknown | Ecommerce | Sixth Sense Ventures | 6300000 | 2019 |
| 1 | Ruangguru | 2014.0 | Mumbai | Edtech | General Atlantic | 150000000 | 2019 |
| 2 | Eduisfun | unknown | Mumbai | Edtech | Deepak Parekh, Amitabh Bachchan, Piyush Pandey | 28000000 | 2019 |
| 3 | HomeLane | 2014.0 | Chennai | Interior design | Evolvence India Fund (EIF), Pidilite Group, FJ... | 30000000 | 2019 |
| 4 | Nu Genes | 2004.0 | Telangana | AgriTech | Innovation in Food and Agriculture (IFA) | 6000000 | 2019 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 84 | Infra.Market | unknown | Mumbai | Infratech | Tiger Global, Nexus Venture Partners, Accel Pa... | 20000000 | 2019 |
| 85 | Oyo | 2013.0 | Gurugram | Hospitality | MyPreferred Transformation, Avendus Finance, S... | 693000000 | 2019 |
| 86 | GoMechanic | 2016.0 | Delhi | Automobile & Technology | Sequoia Capital | 5000000 | 2019 |
| 87 | Spinny | 2015.0 | Delhi | Automobile | Norwest Venture Partners, General Catalyst, Fu... | 50000000 | 2019 |
| 88 | Ess Kay Fincorp | unknown | Rajasthan | Banking | TPG, Norwest Venture Partners, Evolvence India | 33000000 | 2019 |
89 rows × 7 columns
# Previewing 2020 dataset
Data_2020.head()
| Company/Brand | Founded | HeadQuarter | Sector | What it does | Founders | Investor | Amount($) | Stage | Unnamed: 9 | Funding Year | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Aqgromalin | 2019 | Chennai | AgriTech | Cultivating Ideas for Profit | Prasanna Manogaran, Bharani C L | Angel investors | $200,000 | NaN | NaN | 2020 |
| 1 | Krayonnz | 2019 | Bangalore | EdTech | An academy-guardian-scholar centric ecosystem ... | Saurabh Dixit, Gurudutt Upadhyay | GSF Accelerator | $100,000 | Pre-seed | NaN | 2020 |
| 2 | PadCare Labs | 2018 | Pune | Hygiene management | Converting bio-hazardous waste to harmless waste | Ajinkya Dhariya | Venture Center | Undisclosed | Pre-seed | NaN | 2020 |
| 3 | NCOME | 2020 | New Delhi | Escrow | Escrow-as-a-service platform | Ritesh Tiwari | Venture Catalysts, PointOne Capital | $400,000 | NaN | NaN | 2020 |
| 4 | Gramophone | 2016 | Indore | AgriTech | Gramophone is an AgTech platform enabling acce... | Ashish Rajan Singh, Harshit Gupta, Nishant Mah... | Siana Capital Management, Info Edge | $340,000 | NaN | NaN | 2020 |
# Printing a concise summary of 2020 Dataset.
Data_2020.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1055 entries, 0 to 1054 Data columns (total 11 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Company/Brand 1055 non-null object 1 Founded 843 non-null object 2 HeadQuarter 961 non-null object 3 Sector 1042 non-null object 4 What it does 1055 non-null object 5 Founders 1043 non-null object 6 Investor 1017 non-null object 7 Amount($) 1052 non-null object 8 Stage 591 non-null object 9 Unnamed: 9 2 non-null object 10 Funding Year 1055 non-null int64 dtypes: int64(1), object(10) memory usage: 90.8+ KB
# Dropping Columns Founders, Stage, What it does and Unnamed: 9
Data_2020.drop(columns = ["Founders", "Stage", "What it does", "Unnamed: 9"], inplace = True)
# Viewing the columns and rows of the 2020 dataset.
Data_2020.head()
| Company/Brand | Founded | HeadQuarter | Sector | Investor | Amount($) | Funding Year | |
|---|---|---|---|---|---|---|---|
| 0 | Aqgromalin | 2019 | Chennai | AgriTech | Angel investors | $200,000 | 2020 |
| 1 | Krayonnz | 2019 | Bangalore | EdTech | GSF Accelerator | $100,000 | 2020 |
| 2 | PadCare Labs | 2018 | Pune | Hygiene management | Venture Center | Undisclosed | 2020 |
| 3 | NCOME | 2020 | New Delhi | Escrow | Venture Catalysts, PointOne Capital | $400,000 | 2020 |
| 4 | Gramophone | 2016 | Indore | AgriTech | Siana Capital Management, Info Edge | $340,000 | 2020 |
#Renaming Column name Amount($) to Amount in USD
Data_2020.rename(columns = {"Amount($)": "Amount in USD"}, inplace = True)
# Removing dollar and comma signs from the Amount in USD column
Data_2020["Amount in USD"] = Data_2020["Amount in USD"].replace({"\$": "", ",": ""}, regex = True)
# Changing Amount in USD datatype to int/float
Data_2020["Amount in USD"] = pd.to_numeric(Data_2020["Amount in USD"], errors = "coerce").fillna(0, downcast = "infer")
# Changing undisclosed values in 'Amount in USD' column to 0
Updated = Data_2020["Amount in USD"] == "Undisclosed"
Data_2020.loc[Updated, "Amount in USD"] = 0
Data_2020
| Company/Brand | Founded | HeadQuarter | Sector | Investor | Amount in USD | Funding Year | |
|---|---|---|---|---|---|---|---|
| 0 | Aqgromalin | 2019 | Chennai | AgriTech | Angel investors | 200000.0 | 2020 |
| 1 | Krayonnz | 2019 | Bangalore | EdTech | GSF Accelerator | 100000.0 | 2020 |
| 2 | PadCare Labs | 2018 | Pune | Hygiene management | Venture Center | 0.0 | 2020 |
| 3 | NCOME | 2020 | New Delhi | Escrow | Venture Catalysts, PointOne Capital | 400000.0 | 2020 |
| 4 | Gramophone | 2016 | Indore | AgriTech | Siana Capital Management, Info Edge | 340000.0 | 2020 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 1050 | Leverage Edu | NaN | Delhi | Edtech | DSG Consumer Partners, Blume Ventures | 1500000.0 | 2020 |
| 1051 | EpiFi | NaN | NaN | Fintech | Sequoia India, Ribbit Capital | 13200000.0 | 2020 |
| 1052 | Purplle | 2012 | Mumbai | Cosmetics | Verlinvest | 8000000.0 | 2020 |
| 1053 | Shuttl | 2015 | Delhi | Transport | SIG Global India Fund LLP. | 8043000.0 | 2020 |
| 1054 | Pando | 2017 | Chennai | Logitech | Chiratae Ventures | 9000000.0 | 2020 |
1055 rows × 7 columns
# Viewing missing data in 2020 Data set
Data_2020.isnull().sum()
Company/Brand 0 Founded 212 HeadQuarter 94 Sector 13 Investor 38 Amount in USD 0 Funding Year 0 dtype: int64
Data_2020["Founded"].replace(np.NaN, value = "unknown", inplace = True)
Data_2020["HeadQuarter"].replace(np.NaN, value = "unknown", inplace = True)
Data_2020["Sector"].replace(np.NaN, value = "unknown", inplace = True)
Data_2020["Investor"].replace(np.NaN, value = "unknown", inplace = True)
# Checking for null values in dataset.
Data_2020.isnull().sum()
Company/Brand 0 Founded 0 HeadQuarter 0 Sector 0 Investor 0 Amount in USD 0 Funding Year 0 dtype: int64
Data_2020
| Company/Brand | Founded | HeadQuarter | Sector | Investor | Amount in USD | Funding Year | |
|---|---|---|---|---|---|---|---|
| 0 | Aqgromalin | 2019 | Chennai | AgriTech | Angel investors | 200000.0 | 2020 |
| 1 | Krayonnz | 2019 | Bangalore | EdTech | GSF Accelerator | 100000.0 | 2020 |
| 2 | PadCare Labs | 2018 | Pune | Hygiene management | Venture Center | 0.0 | 2020 |
| 3 | NCOME | 2020 | New Delhi | Escrow | Venture Catalysts, PointOne Capital | 400000.0 | 2020 |
| 4 | Gramophone | 2016 | Indore | AgriTech | Siana Capital Management, Info Edge | 340000.0 | 2020 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 1050 | Leverage Edu | unknown | Delhi | Edtech | DSG Consumer Partners, Blume Ventures | 1500000.0 | 2020 |
| 1051 | EpiFi | unknown | unknown | Fintech | Sequoia India, Ribbit Capital | 13200000.0 | 2020 |
| 1052 | Purplle | 2012 | Mumbai | Cosmetics | Verlinvest | 8000000.0 | 2020 |
| 1053 | Shuttl | 2015 | Delhi | Transport | SIG Global India Fund LLP. | 8043000.0 | 2020 |
| 1054 | Pando | 2017 | Chennai | Logitech | Chiratae Ventures | 9000000.0 | 2020 |
1055 rows × 7 columns
# Loading and cleaning 2021 Data set
Data_2021.head()
| Company/Brand | Founded | HeadQuarter | Sector | What it does | Founders | Investor | Amount($) | Stage | Funding Year | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Unbox Robotics | 2019.0 | Bangalore | AI startup | Unbox Robotics builds on-demand AI-driven ware... | Pramod Ghadge, Shahid Memon | BEENEXT, Entrepreneur First | $1,200,000 | Pre-series A | 2021 |
| 1 | upGrad | 2015.0 | Mumbai | EdTech | UpGrad is an online higher education platform. | Mayank Kumar, Phalgun Kompalli, Ravijot Chugh,... | Unilazer Ventures, IIFL Asset Management | $120,000,000 | NaN | 2021 |
| 2 | Lead School | 2012.0 | Mumbai | EdTech | LEAD School offers technology based school tra... | Smita Deorah, Sumeet Mehta | GSV Ventures, Westbridge Capital | $30,000,000 | Series D | 2021 |
| 3 | Bizongo | 2015.0 | Mumbai | B2B E-commerce | Bizongo is a business-to-business online marke... | Aniket Deb, Ankit Tomar, Sachin Agrawal | CDC Group, IDG Capital | $51,000,000 | Series C | 2021 |
| 4 | FypMoney | 2021.0 | Gurugram | FinTech | FypMoney is Digital NEO Bank for Teenagers, em... | Kapil Banwari | Liberatha Kallat, Mukesh Yadav, Dinesh Nagpal | $2,000,000 | Seed | 2021 |
Data_2021.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1209 entries, 0 to 1208 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Company/Brand 1209 non-null object 1 Founded 1208 non-null float64 2 HeadQuarter 1208 non-null object 3 Sector 1209 non-null object 4 What it does 1209 non-null object 5 Founders 1205 non-null object 6 Investor 1147 non-null object 7 Amount($) 1206 non-null object 8 Stage 781 non-null object 9 Funding Year 1209 non-null int64 dtypes: float64(1), int64(1), object(8) memory usage: 94.6+ KB
# Dropping the Stage, Founders and What it does column
Data_2021.drop(columns = ["Founders", "Stage", "What it does"], inplace = True)
#Renaming Column name Amount($) to Amount in USD
Data_2021.rename(columns = {"Amount($)": "Amount in USD"}, inplace = True)
# Removing dollar sign from the Amount column
Data_2021["Amount in USD"] = Data_2021["Amount in USD"].replace({"\$": "", ",": ""}, regex = True)
# Changing amount datatype to int/float
Data_2021["Amount in USD"] = pd.to_numeric(Data_2021["Amount in USD"], errors = "coerce").fillna(0, downcast = "infer")
# Replacing columns within null values with Unknown.
Data_2021["Founded"].replace(np.NaN, value = "unknown", inplace = True)
Data_2021["HeadQuarter"].replace(np.NaN, value = "unknown", inplace = True)
Data_2021["Investor"].replace(np.NaN, value = "unknown", inplace = True)
#Changing datatype for Founded Column from object to int
Data_2021['Founded']= Data_2021['Founded'].astype('str')
Data_2021.head()
| Company/Brand | Founded | HeadQuarter | Sector | Investor | Amount in USD | Funding Year | |
|---|---|---|---|---|---|---|---|
| 0 | Unbox Robotics | 2019.0 | Bangalore | AI startup | BEENEXT, Entrepreneur First | 1200000 | 2021 |
| 1 | upGrad | 2015.0 | Mumbai | EdTech | Unilazer Ventures, IIFL Asset Management | 120000000 | 2021 |
| 2 | Lead School | 2012.0 | Mumbai | EdTech | GSV Ventures, Westbridge Capital | 30000000 | 2021 |
| 3 | Bizongo | 2015.0 | Mumbai | B2B E-commerce | CDC Group, IDG Capital | 51000000 | 2021 |
| 4 | FypMoney | 2021.0 | Gurugram | FinTech | Liberatha Kallat, Mukesh Yadav, Dinesh Nagpal | 2000000 | 2021 |
# Checking for null values in dataset.
Data_2021.isnull().sum()
Company/Brand 0 Founded 0 HeadQuarter 0 Sector 0 Investor 0 Amount in USD 0 Funding Year 0 dtype: int64
Data_2021
| Company/Brand | Founded | HeadQuarter | Sector | Investor | Amount in USD | Funding Year | |
|---|---|---|---|---|---|---|---|
| 0 | Unbox Robotics | 2019.0 | Bangalore | AI startup | BEENEXT, Entrepreneur First | 1200000 | 2021 |
| 1 | upGrad | 2015.0 | Mumbai | EdTech | Unilazer Ventures, IIFL Asset Management | 120000000 | 2021 |
| 2 | Lead School | 2012.0 | Mumbai | EdTech | GSV Ventures, Westbridge Capital | 30000000 | 2021 |
| 3 | Bizongo | 2015.0 | Mumbai | B2B E-commerce | CDC Group, IDG Capital | 51000000 | 2021 |
| 4 | FypMoney | 2021.0 | Gurugram | FinTech | Liberatha Kallat, Mukesh Yadav, Dinesh Nagpal | 2000000 | 2021 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 1204 | Gigforce | 2019.0 | Gurugram | Staffing & Recruiting | Endiya Partners | 3000000 | 2021 |
| 1205 | Vahdam | 2015.0 | New Delhi | Food & Beverages | IIFL AMC | 20000000 | 2021 |
| 1206 | Leap Finance | 2019.0 | Bangalore | Financial Services | Owl Ventures | 55000000 | 2021 |
| 1207 | CollegeDekho | 2015.0 | Gurugram | EdTech | Winter Capital, ETS, Man Capital | 26000000 | 2021 |
| 1208 | WeRize | 2019.0 | Bangalore | Financial Services | 3one4 Capital, Kalaari Capital | 8000000 | 2021 |
1209 rows × 7 columns
Columns = [Data_2018, Data_2019, Data_2020, Data_2021]
Merged_Data = pd.concat(Columns)
Merged_Data
| Company/Brand | Sector | Amount in USD | HeadQuarter | Funding Year | Founded | Investor | |
|---|---|---|---|---|---|---|---|
| 0 | TheCollegeFever | Brand Marketing | 250000.0 | Bangalore | 2018 | NaN | NaN |
| 1 | Happy Cow Dairy | Agriculture | 628000.0 | Mumbai | 2018 | NaN | NaN |
| 2 | MyLoanCare | Credit | 1020500.0 | Gurgaon | 2018 | NaN | NaN |
| 3 | PayMe India | Financial Services | 2000000.0 | Noida | 2018 | NaN | NaN |
| 4 | Eunimart | E-Commerce Platforms | 0.0 | Hyderabad | 2018 | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 1204 | Gigforce | Staffing & Recruiting | 3000000.0 | Gurugram | 2021 | 2019.0 | Endiya Partners |
| 1205 | Vahdam | Food & Beverages | 20000000.0 | New Delhi | 2021 | 2015.0 | IIFL AMC |
| 1206 | Leap Finance | Financial Services | 55000000.0 | Bangalore | 2021 | 2019.0 | Owl Ventures |
| 1207 | CollegeDekho | EdTech | 26000000.0 | Gurugram | 2021 | 2015.0 | Winter Capital, ETS, Man Capital |
| 1208 | WeRize | Financial Services | 8000000.0 | Bangalore | 2021 | 2019.0 | 3one4 Capital, Kalaari Capital |
2879 rows × 7 columns
# Dropping the Founded and Investor columns
Merged_Data.drop(columns = ["Founded", "Investor"], inplace = True)
Merged_Data
| Company/Brand | Sector | Amount in USD | HeadQuarter | Funding Year | |
|---|---|---|---|---|---|
| 0 | TheCollegeFever | Brand Marketing | 250000.0 | Bangalore | 2018 |
| 1 | Happy Cow Dairy | Agriculture | 628000.0 | Mumbai | 2018 |
| 2 | MyLoanCare | Credit | 1020500.0 | Gurgaon | 2018 |
| 3 | PayMe India | Financial Services | 2000000.0 | Noida | 2018 |
| 4 | Eunimart | E-Commerce Platforms | 0.0 | Hyderabad | 2018 |
| ... | ... | ... | ... | ... | ... |
| 1204 | Gigforce | Staffing & Recruiting | 3000000.0 | Gurugram | 2021 |
| 1205 | Vahdam | Food & Beverages | 20000000.0 | New Delhi | 2021 |
| 1206 | Leap Finance | Financial Services | 55000000.0 | Bangalore | 2021 |
| 1207 | CollegeDekho | EdTech | 26000000.0 | Gurugram | 2021 |
| 1208 | WeRize | Financial Services | 8000000.0 | Bangalore | 2021 |
2879 rows × 5 columns
# dropping duplicates in the merged data set.
Merged_Data.drop_duplicates(inplace = True)
Merged_Data
| Company/Brand | Sector | Amount in USD | HeadQuarter | Funding Year | |
|---|---|---|---|---|---|
| 0 | TheCollegeFever | Brand Marketing | 250000.0 | Bangalore | 2018 |
| 1 | Happy Cow Dairy | Agriculture | 628000.0 | Mumbai | 2018 |
| 2 | MyLoanCare | Credit | 1020500.0 | Gurgaon | 2018 |
| 3 | PayMe India | Financial Services | 2000000.0 | Noida | 2018 |
| 4 | Eunimart | E-Commerce Platforms | 0.0 | Hyderabad | 2018 |
| ... | ... | ... | ... | ... | ... |
| 1204 | Gigforce | Staffing & Recruiting | 3000000.0 | Gurugram | 2021 |
| 1205 | Vahdam | Food & Beverages | 20000000.0 | New Delhi | 2021 |
| 1206 | Leap Finance | Financial Services | 55000000.0 | Bangalore | 2021 |
| 1207 | CollegeDekho | EdTech | 26000000.0 | Gurugram | 2021 |
| 1208 | WeRize | Financial Services | 8000000.0 | Bangalore | 2021 |
2839 rows × 5 columns
Merged_Data["Funding Year"] = pd.to_numeric(Merged_Data["Funding Year"], errors = "coerce").fillna(0, downcast = "infer")
Merged_Data
| Company/Brand | Sector | Amount in USD | HeadQuarter | Funding Year | |
|---|---|---|---|---|---|
| 0 | TheCollegeFever | Brand Marketing | 250000.0 | Bangalore | 2018 |
| 1 | Happy Cow Dairy | Agriculture | 628000.0 | Mumbai | 2018 |
| 2 | MyLoanCare | Credit | 1020500.0 | Gurgaon | 2018 |
| 3 | PayMe India | Financial Services | 2000000.0 | Noida | 2018 |
| 4 | Eunimart | E-Commerce Platforms | 0.0 | Hyderabad | 2018 |
| ... | ... | ... | ... | ... | ... |
| 1204 | Gigforce | Staffing & Recruiting | 3000000.0 | Gurugram | 2021 |
| 1205 | Vahdam | Food & Beverages | 20000000.0 | New Delhi | 2021 |
| 1206 | Leap Finance | Financial Services | 55000000.0 | Bangalore | 2021 |
| 1207 | CollegeDekho | EdTech | 26000000.0 | Gurugram | 2021 |
| 1208 | WeRize | Financial Services | 8000000.0 | Bangalore | 2021 |
2839 rows × 5 columns
# Grouping values in the sector column after duplicates were dropped.
group_by_sector = Merged_Data["Sector"].value_counts()
group_by_sector.head(60)
FinTech 170 EdTech 146 Financial Services 86 Fintech 84 E-commerce 73 Edtech 72 Automotive 52 AgriTech 43 Food & Beverages 38 Logistics 37 Gaming 35 Healthcare 35 Information Technology & Services 34 Healthtech 32 HealthCare 31 — 30 SaaS 30 SaaS startup 28 Consumer Goods 28 Media 27 HealthTech 27 Finance 27 Health Care 26 Retail 25 Tech Startup 25 E-learning 24 Ecommerce 23 AI startup 22 E-Commerce 22 Hospitality 22 Apps 20 Computer Software 20 AI 20 Agritech 19 Tech 19 Fashion 19 Information Technology 19 unknown 18 Health, Wellness & Fitness 17 Internet 17 B2B 16 Logistics & Supply Chain 16 Food and Beverage 16 Entertainment 16 Education 15 Food 15 Artificial Intelligence 15 Foodtech 14 Real Estate 13 Fitness 13 Biotechnology 13 Cosmetics 13 Hospital & Health Care 12 Computer software 11 Banking 11 Automobile 11 E-Learning 11 Transportation 11 IoT 11 Tech company 10 Name: Sector, dtype: int64
group_by_sector
FinTech 170
EdTech 146
Financial Services 86
Fintech 84
E-commerce 73
...
Startup laboratory 1
Online credit management startup 1
Battery design 1
Eyeglasses 1
Staffing & Recruiting 1
Name: Sector, Length: 596, dtype: int64
# Replacing duplicate sector names in Sector column with a single name
Merged_Data.replace({'Sector':{'EdTech':'Edtech','FinTech':'Fintech','HealthCare':'Healthcare','SaaS startup':'SaaS','HealthTech': 'Healthtech', 'Ecommerce': 'E-commerce','Food':'Foodtech','AI startup':'AI','AgriTech':'Agritech','Logistics & Supply Chain':'Logistics','IT':'Information Technology','Automobile':'Automotive','Tech':'Tech Startup'}}, inplace = True)
Merged_Data
| Company/Brand | Sector | Amount in USD | HeadQuarter | Funding Year | |
|---|---|---|---|---|---|
| 0 | TheCollegeFever | Brand Marketing | 250000.0 | Bangalore | 2018 |
| 1 | Happy Cow Dairy | Agriculture | 628000.0 | Mumbai | 2018 |
| 2 | MyLoanCare | Credit | 1020500.0 | Gurgaon | 2018 |
| 3 | PayMe India | Financial Services | 2000000.0 | Noida | 2018 |
| 4 | Eunimart | E-Commerce Platforms | 0.0 | Hyderabad | 2018 |
| ... | ... | ... | ... | ... | ... |
| 1204 | Gigforce | Staffing & Recruiting | 3000000.0 | Gurugram | 2021 |
| 1205 | Vahdam | Food & Beverages | 20000000.0 | New Delhi | 2021 |
| 1206 | Leap Finance | Financial Services | 55000000.0 | Bangalore | 2021 |
| 1207 | CollegeDekho | Edtech | 26000000.0 | Gurugram | 2021 |
| 1208 | WeRize | Financial Services | 8000000.0 | Bangalore | 2021 |
2839 rows × 5 columns
group_by_sector = Merged_Data["Sector"].value_counts()
group_by_sector
Fintech 254
Edtech 218
E-commerce 96
Financial Services 86
Healthcare 66
...
Online credit management startup 1
Battery design 1
Eyeglasses 1
Online financial service 1
Staffing & Recruiting 1
Name: Sector, Length: 583, dtype: int64
x = Merged_Data['Company/Brand'].value_counts()[:10].index
y = Merged_Data['Company/Brand'].value_counts()[:10].values
plt.bar(x,y)
plt.rcParams['figure.figsize'] = (20,10)
plt.xticks(fontsize = 15)
plt.yticks(fontsize = 15)
plt.title('TOP FUNDED START-UPS IN INDIA', fontsize = 19, fontweight = 'bold')
plt.xlabel('Start-ups', fontsize = 19, fontweight = 'bold')
plt.ylabel('Amount received', fontsize = 19, fontweight = 'bold')
Text(0, 0.5, 'Amount received')
fig1 = go.Figure(
data=go.Pie(values=Merged_Data['HeadQuarter'].value_counts()[:8].values,labels=Merged_Data['HeadQuarter'].value_counts()[:8].index,title='Percentage of cities in every sector'))
fig1.show()
c = list(Merged_Data.groupby(Merged_Data['Funding Year']).sum()['Amount in USD'])
d = list(Merged_Data['Funding Year'].value_counts().index.sort_values())
sns.scatterplot(d,c)
plt.plot(d,c)
plt.xlabel('Year', fontsize = 22, fontweight = 'bold')
plt.ylabel('Amount(USD) in billions', fontsize = 22, fontweight = 'bold')
plt.xticks(fontsize = 15)
plt.yticks(fontsize = 15)
plt.title('Amount Funded over the Years', fontsize = 22, fontweight = 'bold')
plt.rcParams['figure.figsize'] = (20,10)
warnings.filterwarnings('ignore')
sns.countplot(Merged_Data['Funding Year'])
plt.xticks(fontsize = 15)
plt.yticks(fontsize = 15)
plt.xlabel('Year', fontsize = 22, fontweight = 'bold')
plt.ylabel('No. of Startups Funded', fontsize = 22, fontweight = 'bold')
plt.title('No of Startups Funded Over the Years', fontsize = 22, fontweight = 'bold')
plt.rcParams['figure.figsize'] = (18,8)
warnings.filterwarnings('ignore')
plt.figure(figsize = (25,18))
sns.barplot(y = group_by_sector[:10].index, x = group_by_sector[:10].values)
plt.xticks(fontsize = 17)
plt.yticks(fontsize = 17)
plt.xlabel("Number of Investors", fontsize = 24, fontweight = 'bold')
plt.ylabel("Sector", fontsize = 24, fontweight = 'bold')
plt.title("SECTORS PREFERRED BY INVESTORS", fontsize = 24, fontweight = 'bold')
Text(0.5, 1.0, 'SECTORS PREFERRED BY INVESTORS')